--- 
data: 
  params: 
    batch_size: 6
    train: 
      params: 
        crop_size: 256
        size: 290
      target: taming.data.ade20k.ADE20kTrain
    validation: 
      params: 
        crop_size: 256
        size: 256
      target: taming.data.ade20k.ADE20kValidation
  target: main.DataModuleFromConfig
model: 
  base_learning_rate: 4.5e-06
  params: 
    cond_stage_config: 
      params: 
        colorize_nlabels: 151
        ddconfig: 
          attn_resolutions: 
            - 16
          ch: 128
          ch_mult: 
            - 1
            - 1
            - 2
            - 2
            - 4
          double_z: false
          dropout: 0.0
          in_channels: 151
          num_res_blocks: 2
          out_ch: 151
          resolution: 256
          z_channels: 256
        embed_dim: 256
        image_key: segmentation
        lossconfig: 
          target: taming.modules.losses.DummyLoss
        n_embed: 1024
      target: taming.models.vqgan.VQModel
    cond_stage_key: segmentation
    first_stage_config: 
      params: 
        ddconfig: 
          attn_resolutions: 
            - 16
          ch: 128
          ch_mult: 
            - 1
            - 1
            - 2
            - 2
            - 4
          double_z: false
          dropout: 0.0
          in_channels: 3
          num_res_blocks: 2
          out_ch: 3
          resolution: 256
          z_channels: 256
        embed_dim: 256
        lossconfig: 
          target: taming.modules.losses.DummyLoss
        n_embed: 4096
      target: taming.models.vqgan.VQModel
    first_stage_key: image
    transformer_config: 
      params: 
        attn_pdrop: 0.1
        block_size: 512
        n_embd: 1024
        n_head: 16
        n_layer: 28
        resid_pdrop: 0.1
        vocab_size: 4096
      target: taming.modules.transformer.mingpt.GPT
  target: taming.models.cond_transformer.Net2NetTransformer